{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<img src=https://upload.wikimedia.org/wikipedia/commons/thumb/1/11/TensorFlowLogo.svg/2000px-TensorFlowLogo.svg.png width=20% img>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Predicting the Number of Cylinders With <font color='orange'>Tensorflow 2.0</font>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.chdir('c:/users/nicolas/documents/data/thecarconnection')\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "import tensorflow as tf\n",
    "from tensorflow.keras.layers import Dense\n",
    "from tensorflow.keras.models import Model\n",
    "tf.keras.backend.set_floatx('float64')\n",
    "from sklearn.preprocessing import LabelEncoder"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Loading the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv([i for i in os.listdir() if i.startswith('cleaned')][0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "cols = ['Passenger Capacity', 'Passenger Doors', 'Front Shoulder Room (in)',\n",
    "        'Front Head Room (in)', 'Second Leg Room (in)', 'Front Leg Room (in)',\n",
    "        'Second Shoulder Room (in)', 'Second Head Room (in)',\n",
    "        'Height, Overall (in)', 'Wheelbase (in)', 'Width, Max w/o mirrors (in)',\n",
    "        'Fuel Tank Capacity, Approx (gal)', 'EPA Fuel Economy Est - Hwy (MPG)',\n",
    "        'EPA Fuel Economy Est - City (MPG)', 'Third Gear Ratio (:1)',\n",
    "        'First Gear Ratio (:1)', 'Fourth Gear Ratio (:1)',\n",
    "        'Second Gear Ratio (:1)', 'Front Brake Rotor Diam x Thickness (in)',\n",
    "        'Rear Brake Rotor Diam x Thickness (in)',\n",
    "        'Turning Diameter - Curb to Curb', 'Gears', 'Net Horsepower',\n",
    "        'Net Horsepower RPM', 'Net Torque', 'Net Torque RPM', 'Cylinders',\n",
    "        'Displacement (L)', 'Displacement (cc)', 'Rear Tire Width',\n",
    "        'Front Tire Width', 'Rear Wheel Size', 'Front Wheel Size', 'Tire Ratio']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df[cols].sample(frac=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Encoding the input"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Passenger Capacity</th>\n",
       "      <th>Passenger Doors</th>\n",
       "      <th>Front Shoulder Room (in)</th>\n",
       "      <th>Front Head Room (in)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>55.5</td>\n",
       "      <td>38.60</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>22.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>68.8</td>\n",
       "      <td>40.20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>7.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>64.9</td>\n",
       "      <td>42.80</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>22.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>68.8</td>\n",
       "      <td>39.96</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>58.0</td>\n",
       "      <td>38.80</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Passenger Capacity  Passenger Doors  Front Shoulder Room (in)  \\\n",
       "0                 5.0              4.0                      55.5   \n",
       "1                22.0              3.0                      68.8   \n",
       "2                 7.0              4.0                      64.9   \n",
       "3                22.0              3.0                      68.8   \n",
       "4                 5.0              4.0                      58.0   \n",
       "\n",
       "   Front Head Room (in)  \n",
       "0                 38.60  \n",
       "1                 40.20  \n",
       "2                 42.80  \n",
       "3                 39.96  \n",
       "4                 38.80  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head().iloc[:, :4].reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = df.drop('Cylinders', axis=1)\n",
    "y = df['Cylinders']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "le = LabelEncoder()\n",
    "y = le.fit_transform(y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Transforming the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "ms = MinMaxScaler()\n",
    "x = ms.fit_transform(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "xtrain, xtest, ytrain, ytest = train_test_split(x, y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Creating an iterable"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_ds = tf.data.Dataset.from_tensor_slices((xtrain, ytrain)).shuffle(25).batch(24)\n",
    "test_ds = tf.data.Dataset.from_tensor_slices((xtest, ytest)).batch(24)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Creating a neural net class"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "class MyModel(Model):\n",
    "    def __init__(self):\n",
    "        super(MyModel, self).__init__()\n",
    "        self.d1 = Dense(64, activation='relu')\n",
    "        self.d2 = Dense(128, activation='relu')\n",
    "        self.d3 = Dense(256, activation='relu')\n",
    "        self.d4 = Dense(7, activation='softmax')\n",
    "        \n",
    "    def call(self, x):\n",
    "        x = self.d1(x)\n",
    "        x = self.d2(x)\n",
    "        x = self.d3(x)\n",
    "        x = self.d4(x)\n",
    "        return x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = MyModel()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Creating loss and accuracy objects"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "loss_object = tf.keras.losses.SparseCategoricalCrossentropy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_acc = tf.keras.metrics.SparseCategoricalAccuracy()\n",
    "train_loss = tf.keras.metrics.Mean(name='train_loss')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_acc = tf.keras.metrics.SparseCategoricalAccuracy()\n",
    "test_loss = tf.keras.metrics.Mean(name='test_loss')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "optimizer = tf.keras.optimizers.Adam()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Step functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "@tf.function\n",
    "def train_step(inputs, targets):\n",
    "    with tf.GradientTape() as tape:\n",
    "        predictions = model(inputs)\n",
    "        loss = loss_object(targets, predictions)\n",
    "    gradients = tape.gradient(loss, model.trainable_variables) \n",
    "    optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n",
    "    \n",
    "    train_loss(loss)\n",
    "    train_acc(targets, predictions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "@tf.function\n",
    "def test_step(inputs, targets):\n",
    "    predictions = model(inputs)\n",
    "    t_loss = loss_object(targets, predictions)\n",
    "    \n",
    "    test_loss(t_loss)\n",
    "    test_acc(targets, predictions)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Training the model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1, Loss: 0.0159, Accuracy: 99.44%, Test Loss: 0.0220, Test Accuracy: 99.21%\n",
      "Epoch 2, Loss: 0.0164, Accuracy: 99.45%, Test Loss: 0.0197, Test Accuracy: 99.41%\n",
      "Epoch 3, Loss: 0.0176, Accuracy: 99.45%, Test Loss: 0.0156, Test Accuracy: 99.56%\n",
      "Epoch 4, Loss: 0.0126, Accuracy: 99.55%, Test Loss: 0.0150, Test Accuracy: 99.55%\n",
      "Epoch 5, Loss: 0.0139, Accuracy: 99.59%, Test Loss: 0.0186, Test Accuracy: 99.48%\n"
     ]
    }
   ],
   "source": [
    "for epoch in range(5):\n",
    "    for data, labels in train_ds:\n",
    "        train_step(data, labels)\n",
    "        \n",
    "    for test_data, test_labels in test_ds:\n",
    "        test_step(test_data, test_labels)\n",
    "        \n",
    "    template = 'Epoch {:d}, Loss: {:>6.4f}, Acc.: {:>5.2%}, '\\\n",
    "    'Test Loss: {:>6.4f}, Test Acc.: {:>5.2%}'\n",
    "\n",
    "    print(template.format(epoch+1,\n",
    "                        train_loss.result(),\n",
    "                        train_acc.result(),\n",
    "                        test_loss.result(),\n",
    "                        test_acc.result()))\n",
    "        \n",
    "    train_loss.reset_states()\n",
    "    train_acc.reset_states()\n",
    "    \n",
    "    test_loss.reset_states()\n",
    "    test_acc.reset_states()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
